


cd D:\Dropbox\copyright_office\loc\


capture erase stata_append.dta
filelist, dir("[data dir]") pat("temp*.dta") save("stata_append.dta")
        

         use "stata_append.dta", clear
		 
		 
			local obs = _N
			 forvalues i=1/`obs' {
			   use "stata_append.dta" in `i', clear
			   local f = dirname + "/" + filename
			   use "`f'", clear
			   gen source = "`f'"
			   tempfile save`i'
			   save "`save`i''"
			 }

			 use "`save1'", clear
			 forvalues i=2/`obs' {
			   append using "`save`i''", force
			 }

		 
		gen class1=substr(vtag050codea,1,1)
		gen class2=substr(vtag050codea,1,2)
		
		rename class1 class 
		merge m:1 class using data\loc_class.dta 
		
		
		keep if _merge ==3 
		drop _merge 
	

		drop id n 

		rename vtag100codea name
		split name, parse(", " " ")

		drop name??
		replace name2=subinstr(name2,",","",.)
		replace name2=subinstr(name2,".","",.)

		replace name3=subinstr(name3,",","",.)
		replace name3=subinstr(name3,".","",.)

		rename vtag100codeq nick
		split nick, parse("(" " " ")")

	
	
		gen firstname = name2 
		replace firstname=subinstr(firstname,".","",.)
		replace firstname = nick2 if nick2~=""
		replace firstname = name3 if length(firstname)==1 & length(name3)>1



		gen year=vtag260codec
		replace year=subinstr(year,"[","",.)
		replace year=subinstr(year,"]","",.)
		replace year=subinstr(year,"c","",.)
		replace year=subinstr(year,"C","",.)
		replace year=subinstr(year,".","",.)


		gen x=real(substr(year,1,4))
		drop year 
		rename x year 




		drop name 
		rename firstname name 

		gen NAME=upper(name)
		drop name 
		rename NAME name 

		merge m:1 name using data\name_gender_wipo.dta

		drop if _merge==2 

		gen dname=name~=""
		gen dmatch = _merge==3

		su dmatch dname 


		gen fshare=1-mshare
		table subject, c(count dname sum dname sum dmatch mean fshare)

		drop vtag* name? nick? _merge 
 save data\loc.dta, replace 
 
 
 


******************************************
use  data\loc.dta, replace 

 keep if year>1800 & year<=2016 
 gen x=1
 gen dgender=fshare~=.
collapse (sum) N = x Nname=dname Nmatch=dmatch Nfemale = fshare, by(subject year) 

gen fem_share = Nfemale/Nmatch 





save loc_summary.dta, replace 
